Create the weather data

weather_df = 
  rnoaa::meteo_pull_monitors(c("USW00094728", "USC00519397", "USS0023B17S"),
                      var = c("PRCP", "TMIN", "TMAX"), 
                      date_min = "2017-01-01",
                      date_max = "2017-12-31") %>%
  mutate(
    name = recode(id, USW00094728 = "CentralPark_NY", 
                      USC00519397 = "Waikiki_HA",
                      USS0023B17S = "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) %>%
  select(name, id, everything())
## file path:          /Users/SigL/Library/Caches/rnoaa/ghcnd/USW00094728.dly
## file last updated:  2019-09-26 10:25:27
## file min/max dates: 1869-01-01 / 2019-09-30
## file path:          /Users/SigL/Library/Caches/rnoaa/ghcnd/USC00519397.dly
## file last updated:  2019-09-26 10:25:41
## file min/max dates: 1965-01-01 / 2019-09-30
## file path:          /Users/SigL/Library/Caches/rnoaa/ghcnd/USS0023B17S.dly
## file last updated:  2019-09-26 10:25:46
## file min/max dates: 1999-09-01 / 2019-09-30
weather_df
## # A tibble: 1,095 x 6
##    name           id          date        prcp  tmax  tmin
##    <chr>          <chr>       <date>     <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2017-01-01     0   8.9   4.4
##  2 CentralPark_NY USW00094728 2017-01-02    53   5     2.8
##  3 CentralPark_NY USW00094728 2017-01-03   147   6.1   3.9
##  4 CentralPark_NY USW00094728 2017-01-04     0  11.1   1.1
##  5 CentralPark_NY USW00094728 2017-01-05     0   1.1  -2.7
##  6 CentralPark_NY USW00094728 2017-01-06    13   0.6  -3.8
##  7 CentralPark_NY USW00094728 2017-01-07    81  -3.2  -6.6
##  8 CentralPark_NY USW00094728 2017-01-08     0  -3.8  -8.8
##  9 CentralPark_NY USW00094728 2017-01-09     0  -4.9  -9.9
## 10 CentralPark_NY USW00094728 2017-01-10     0   7.8  -6  
## # … with 1,085 more rows

cache: save the dataset to the directory, not redownloading it every time

Initial plot

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = .5)
## Warning: Removed 15 rows containing missing values (geom_point).

Add labels

captions and titles

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = .5) +
  labs(
    title = "Temperature plot",
    x = "Minumum Temp (C)",
    y = "Maximum Temp (C)",
    caption = "Data from the rnoaa package"
  )
## Warning: Removed 15 rows containing missing values (geom_point).

ticks and pokes (highlight scale on axis)

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package") + 
  scale_x_continuous(
    breaks = c(-15, 0, 15), 
    labels = c("-15 (too cold)", "0", "15")
    )
## Warning: Removed 15 rows containing missing values (geom_point).

sqrt trans

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package") + 
  scale_x_continuous(
    breaks = c(-15, 0, 15), 
    labels = c("-15ºC", "0", "15"),
    limits = c(-20, 30)) + 
  scale_y_continuous(
    trans = "sqrt", 
    position = "right")
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 90 rows containing missing values (geom_point).

Colors

weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package") + 
  scale_color_hue(
    name = "Location", 
    h = c(100, 300)
    )
## Warning: Removed 15 rows containing missing values (geom_point).

ggp_base = weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package"
    ) + 
  viridis::scale_color_viridis(
    name = "Location", 
    discrete = TRUE
    )

Themes

ggp_base +
  theme_bw() +
  theme(legend.position = "bottom")
## Warning: Removed 15 rows containing missing values (geom_point).

ggp_base +
  theme_minimal() +                         ##order matters
  theme(legend.position = "bottom")   
## Warning: Removed 15 rows containing missing values (geom_point).

theme_bw()–black and white theme theme_minimal has to go first, otherwise it will not change the position to “bottom”

(Where do my warnings come from)

weather_df %>% 
  filter(is.na(tmin))
## # A tibble: 15 x 6
##    name       id          date        prcp  tmax  tmin
##    <chr>      <chr>       <date>     <dbl> <dbl> <dbl>
##  1 Waikiki_HA USC00519397 2017-04-17     5  28.3    NA
##  2 Waikiki_HA USC00519397 2017-05-09    NA  NA      NA
##  3 Waikiki_HA USC00519397 2017-05-26    NA  NA      NA
##  4 Waikiki_HA USC00519397 2017-07-19    NA  NA      NA
##  5 Waikiki_HA USC00519397 2017-10-07     0  31.1    NA
##  6 Waikiki_HA USC00519397 2017-10-09     0  28.9    NA
##  7 Waikiki_HA USC00519397 2017-10-10    10  31.7    NA
##  8 Waikiki_HA USC00519397 2017-10-12     0  31.1    NA
##  9 Waikiki_HA USC00519397 2017-10-13     0  31.1    NA
## 10 Waikiki_HA USC00519397 2017-10-16     5  30      NA
## 11 Waikiki_HA USC00519397 2017-10-18     0  29.4    NA
## 12 Waikiki_HA USC00519397 2017-10-20    13  30.6    NA
## 13 Waikiki_HA USC00519397 2017-10-21     0  30      NA
## 14 Waikiki_HA USC00519397 2017-10-22     0  30      NA
## 15 Waikiki_HA USC00519397 2017-12-22     0  26.7    NA

library(tidyverse)

knitr::opts_chunk$set( fig.width = 6, fig.asp = .6, out.width = “90%” )

theme_set(theme_bw() + theme(legend.position = “bottom”))

There are ways to set color preferences globally as well (for example, to use viridis color palettes everywhere), although they’re a bit more involved.

more than one dataset

central_park = 
  weather_df %>% 
  filter(name == "CentralPark_NY")

waikiki = 
  weather_df %>% 
  filter(name == "Waikiki_HA")

ggplot(data = waikiki, aes(x = date, y = tmax, color = name)) + 
  geom_point() +
  geom_line(data = central_park)
## Warning: Removed 3 rows containing missing values (geom_point).

(breif aside about colors)

waikiki %>% 
  ggplot(aes(x = date, y = tmax, color = "red")) + geom_point()
## Warning: Removed 3 rows containing missing values (geom_point).

waikiki %>% 
  ggplot(aes(x = date, y = tmax)) + geom_point(alpha = .5, color = "red")
## Warning: Removed 3 rows containing missing values (geom_point).

Multi-panel plots

ggp_scatter = weather_df %>% 
  ggplot(aes(x = tmin, y = tmax)) +
  geom_point()

ggp_density = weather_df %>% 
  ggplot(aes(x = tmin)) +
  geom_density()

ggp_box = weather_df %>% 
  ggplot(aes(x = name, y = tmax, color = name)) +
  geom_boxplot()

use patchwork package, to put plots together

(ggp_scatter + ggp_density) / ggp_box
## Warning: Removed 15 rows containing missing values (geom_point).
## Warning: Removed 15 rows containing non-finite values (stat_density).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

Data manipulation

factor variables

 weather_df %>% 
  mutate(
    name = factor(name),
    name = fct_relevel(name, "Waikiki_HA", "CentralPark_NY")
  ) %>% 
  ggplot(aes(x = name, y = tmax, color = name)) +
  geom_boxplot()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

 weather_df %>% 
  mutate(
    name = factor(name),
    name = fct_reorder(name, tmax)
  ) %>% 
  ggplot(aes(x = name, y = tmax, color = name)) +
  geom_boxplot()
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).

restructure then plot

when creating scatterplot, in order to reorder the plot (bringing “Central Park” to the front intead of “Waikiki”), we have to reorder the dataset instead of only using “fct_reorder” (factor_ordering)

weather_df %>% 
  pivot_longer(
    tmax:tmin,
    names_to = "observation",
    values_to = "temperature"
  ) %>% 
  ggplot(aes(x = temperature, fill = observation)) + 
  geom_density(alpha = .5) +
  facet_grid(~name) +
  theme(legend.position = "bottom")
## Warning: Removed 18 rows containing non-finite values (stat_density).

As a final example, we’ll revisit the FAS data. We’ve seen code for data import and organization and for joining the litters and pups data. Here we add some data tidying steps to view pup-level outcomes (post-natal day on which ears “work”, on which the pup can walk, etc) across values of dose category and treatment day.

pup_data = 
  read_csv("./data/FAS_pups.csv", col_types = "ciiiii") %>%
  janitor::clean_names() %>%
  mutate(sex = recode(sex, `1` = "male", `2` = "female")) 

litter_data = 
  read_csv("./data/FAS_litters.csv", col_types = "ccddiiii") %>%
  janitor::clean_names() %>%
  select(-pups_survive) %>%
  separate(group, into = c("dose", "day_of_tx"), sep = 3) %>%
  mutate(wt_gain = gd18_weight - gd0_weight,
         day_of_tx = as.numeric(day_of_tx))

fas_data = left_join(pup_data, litter_data, by = "litter_number") 

fas_data %>% 
  select(sex, dose, day_of_tx, pd_ears:pd_walk) %>% 
  pivot_longer(
    pd_ears:pd_walk,
    names_to = "outcome", 
    values_to = "pn_day") %>% 
  drop_na() %>% 
  mutate(outcome = forcats::fct_reorder(outcome, day_of_tx, median)) %>% 
  ggplot(aes(x = dose, y = pn_day)) + 
  geom_violin() + 
  facet_grid(day_of_tx ~ outcome)

a more data-tidying problem rather than ggplot problem